Sankey Plot for the 3 annotation modalities with 'Azimuth' as the source¶

In [1]:
import pandas as pd, numpy as np, scanpy as sc
import plotly
import plotly.express as px
from asctb_ct_label_mapper.utilities.nlp_preprocessing import execute_nlp_pipeline

plotly.offline.init_notebook_mode()




def fetch_and_parse_crosswalk_table(crosswalk_filename='Azimuth_CellTypist_PopV_Lung_ASCTB_Crosswalks.csv', raw_labels_column='raw_input_column', asctb_crosswalk_column='translation_column', verbose=False):
    """Processes the final-crosswalk data containing SME feedback for translating raw-labels into ASCTB naming convention.

    Args:
        crosswalk_filename (str, optional): Defaults to 'Azimuth_CellTypist_PopV_Lung_ASCTB_Crosswalks.csv'.
        raw_labels_column (str, optional): Defaults to 'raw_input_column'.
        asctb_crosswalk_column (str, optional): Defaults to 'translation_column'.
        verbose (bool, optional): Flag to indicate logging in verbose mode. Defaults to False.

    Returns:
        pd.DataFrame: Contains the final-crosswalk information.
    """
    # Merge the aggregated data with the translations file created using our ASCTB-Mapper package with finalized SME feedback
    crosswalk_df = pd.read_csv(crosswalk_filename)

    crosswalk_df['source'] = crosswalk_df['source'].replace('Azimuth-HLCAv2', 'azimuth').replace('PopV-Lung', 'popv').replace('CellTypist-Lung', 'celltypist')

    crosswalk_df[raw_labels_column] = crosswalk_df[raw_labels_column].str.lower().replace('φ','ï†')

    crosswalk_df['asctb_equivalent'] = crosswalk_df[asctb_crosswalk_column].replace('?', np.nan)
    crosswalk_df.loc[crosswalk_df['asctb_equivalent'].isna(), 'asctb_equivalent'] = crosswalk_df.loc[crosswalk_df['asctb_equivalent'].isna(), 'best_matched_asctb_label']
    crosswalk_df['asctb_equivalent'] = crosswalk_df['asctb_equivalent'].str.lower().replace('φ','ï†')
    return crosswalk_df



def get_crosswalk_translation_hmap(crosswalk_df):
    """Create a hashmap of Gloria's crosswalk for cell-type labels from Azimuth/CellTypist/PopV -> ASCTB naming conventions.

    Args:
        crosswalk_df (pd.DataFrame): DataFrame containing final crosswalk information. Essential columns: `[unique_cts, asctb_equivalent]`.
    """
    translation_hmap = dict(
        zip(
            crosswalk_df['unique_cts'], crosswalk_df['asctb_equivalent']
        )
    )
    return translation_hmap




def clean_and_translate_annotation(input_label):
    return ' '.join([execute_nlp_pipeline(word) for word in input_label.split()])




# Read and preprocess the Crosswalk table with SME Feedback to translate annotations -> ASCTB convention
crosswalk_df = fetch_and_parse_crosswalk_table(
    crosswalk_filename='ASCTB_Mapper/Azimuth_CellTypist_PopV_Lung_ASCTB_Crosswalks.csv',
    raw_labels_column='raw_input_label',
    asctb_crosswalk_column='Glorias_recco_for_ASCTB_Crosswalk',
    verbose=False
)


crosswalk_df = crosswalk_df.rename({'raw_input_label':'unique_cts'}, axis=1)
translation_hmap = get_crosswalk_translation_hmap(crosswalk_df)



ANNDATA_FOLDER = 'Datasets'
QUERY_DATASET_NAME = 'LCA'

# "LCA.h5ad" file is pre-annotated
predictions_adata = sc.read_h5ad(f'{ANNDATA_FOLDER}/{QUERY_DATASET_NAME}/{QUERY_DATASET_NAME}.h5ad')


# Get rid of the individual cell-level preds from PopV. Keep only majority_voting in obs dataframe
popv_lung_all_preds_df = pd.read_csv(f'{ANNDATA_FOLDER}/{QUERY_DATASET_NAME}/popv_preds.csv')
popv_lung_all_preds_df['popv_majority_vote_prediction_original'] = popv_lung_all_preds_df['popv_majority_vote_prediction']
for c in 'popv_prediction', 'popv_majority_vote_prediction':
    popv_lung_all_preds_df[c] = popv_lung_all_preds_df[c].apply(lambda x : translation_hmap.get(x.lower().replace('φ','ï†'), x))
popv_lung_preds_df = popv_lung_all_preds_df[['popv_majority_vote_prediction','popv_prediction_score']].rename({'popv_majority_vote_prediction':'raw_predicted_labels'}, axis=1)
popv_lung_preds_df['predicted_labels'] = popv_lung_preds_df['raw_predicted_labels']
predictions_adata.obs['popv_preds'] = popv_lung_preds_df['predicted_labels'].tolist()
predictions_adata.obs['popv_scores'] = popv_lung_preds_df['popv_prediction_score'].tolist()


# Get rid of the individual cell-level preds from CellTypist. Keep only majority_voting in obs dataframe
celltypist_lung_all_preds_df = pd.read_csv(f'{ANNDATA_FOLDER}/{QUERY_DATASET_NAME}/celltypist_preds.csv')
celltypist_lung_all_preds_df['majority_voting_original'] = celltypist_lung_all_preds_df['majority_voting']
for c in ['predicted_labels', 'majority_voting']:
    celltypist_lung_all_preds_df[c] = celltypist_lung_all_preds_df[c].apply(lambda x : translation_hmap.get(x.lower().replace('φ','ï†'), x))
celltypist_lung_preds_df = celltypist_lung_all_preds_df[['majority_voting','conf_score']].rename({'majority_voting':'raw_predicted_labels'}, axis=1)
celltypist_lung_preds_df['predicted_labels'] = celltypist_lung_preds_df['raw_predicted_labels']
predictions_adata.obs['celltypist_preds'] = celltypist_lung_preds_df['predicted_labels'].tolist()
predictions_adata.obs['celltypist_scores'] = celltypist_lung_preds_df['conf_score'].tolist()



# Merge in Azimuth predictions in obs dataframe. Keep only finest level predictions in obs dataframe.
azimuth_lung_all_preds_df = pd.read_csv(f'{ANNDATA_FOLDER}/{QUERY_DATASET_NAME}/azimuth_preds.tsv', sep=' ')
azimuth_lung_all_preds_df['predicted.ann_finest_level_original'] = azimuth_lung_all_preds_df['predicted.ann_finest_level']
for c in ['predicted.ann_level_1','predicted.ann_level_2','predicted.ann_level_3','predicted.ann_level_4','predicted.ann_level_5','predicted.ann_finest_level']:
    azimuth_lung_all_preds_df[c] = azimuth_lung_all_preds_df[c].apply(lambda x : translation_hmap.get(x.lower().replace('φ','ï†'), x))

azimuth_lung_preds_df = azimuth_lung_all_preds_df[['predicted.ann_finest_level', 'predicted.ann_finest_level.score']].rename({'predicted.ann_finest_level':'raw_predicted_labels'}, axis=1)
azimuth_lung_preds_df['predicted_labels'] = azimuth_lung_preds_df['raw_predicted_labels']
predictions_adata.obs['azimuth_preds'] = azimuth_lung_preds_df['predicted_labels'].tolist()
predictions_adata.obs['azimuth_scores'] = azimuth_lung_preds_df['predicted.ann_finest_level.score'].tolist()

predictions_adata.obs.columns
Out[1]:
Index(['method', 'donor', 'cell_ontology_type', 'donor_method',
       'cell_ontology_id', 'popv_preds', 'popv_scores', 'celltypist_preds',
       'celltypist_scores', 'azimuth_preds', 'azimuth_scores'],
      dtype='object')
In [ ]:
 

PopV-compartments by merging information from the source dataframes¶

We need to recreate the logic that the Tabula-Sapiens authors used to merge the 2 source datasets to create the merged LCA dataset.

Trying to retrace the indexes in the target dataframe, back to the source dataframe columns.

  1. Found that the index in the target LCA.obs df was cell.id+-1 in the 10x data.

  2. Found that the index in the target LCA.obs df was cell.id+-2 in the smartseq2 data.

All of this investigation effort is required because haven't yet received clarification from their team's developer, and they don't have this documented anywhere.

This entire trace-back activity is required so that we can pull the compartment column from the 2 source dataframes into our target LCA dataframe containing popv_preds and then show a breakdown of compartments.

In [2]:
lca_smart_seq2_name = 'LCA_Smart_seq2'
lca_10x_name = 'LCA_10x'

lca_smart_seq2_adata = sc.read_h5ad(f'{ANNDATA_FOLDER}/{lca_smart_seq2_name}/{lca_smart_seq2_name}.h5ad')
lca_10x_adata = sc.read_h5ad(f'{ANNDATA_FOLDER}/{lca_10x_name}/{lca_10x_name}.h5ad')

lca_smart_seq2_adata.obs.head(2).T
Out[2]:
index A1_B002014.gencode.vH29 A1_B003138.gencode.vH29
nGene 1724 2311
nReads 542313 1135638
plate.barcode B002014 B003138
cell.id A1_B002014.gencode.vH29 A1_B003138.gencode.vH29
region normal normal
label Ecpam, CD45 Ecpam, CD45
sorter ahmad/kyle/lolita ahmad/kyle/lolita
sort.location biohub biohub
sample medial 2 medial 2
location medial medial
donor_id 2 2
percent.ercc 0.098967 0.078368
percent.ribo 0.011503 0.026572
gating nan nan
free_annotation Capillary Aerocyte Capillary Aerocyte
Number of splices: Total 487297 957485
Number of splices: Annotated (sjdb) 487067 957386
Number of splices: GT-AG 482193 949158
Number of splices: GC-AG 3466 6707
Number of splices: AT-AC 277 304
Number of splices: Non-canonical 1361 1316
Mapping speed, Million of reads per hour 49.785 52.45
Average input read length 238.5 239.0
compartment endothelial endothelial
tissue_ontology_term_id UBERON:0002048 UBERON:0002048
disease_ontology_term_id PATO:0000461 PATO:0000461
development_stage_ontology_term_id HsapDv:0000140 HsapDv:0000140
assay_ontology_term_id EFO:0008931 EFO:0008931
cell_type_ontology_term_id CL:0000115 CL:0000115
self_reported_ethnicity_ontology_term_id unknown unknown
sex_ontology_term_id PATO:0000384 PATO:0000384
is_primary_data True True
organism_ontology_term_id NCBITaxon:9606 NCBITaxon:9606
suspension_type cell cell
cell_type endothelial cell endothelial cell
assay Smart-seq2 Smart-seq2
disease normal normal
organism Homo sapiens Homo sapiens
sex male male
tissue lung lung
self_reported_ethnicity unknown unknown
development_stage 46-year-old human stage 46-year-old human stage
In [3]:
lca_10x_adata.obs.head(2).T
Out[3]:
index P2_1_AAACCTGAGAAACCAT P2_1_AAATGCCAGATGAGAG
nGene 1347 1713
nUMI 2914 4226
channel P2_1 P2_1
region normal normal
percent.ribo 0.035347 0.061051
free_annotation Capillary Aerocyte Capillary Aerocyte
donor_id 2 2
sample distal 2 distal 2
location distal distal
magnetic.selection epithelial epithelial
preparation.site biohub biohub
compartment endothelial endothelial
tissue_ontology_term_id UBERON:0002048 UBERON:0002048
assay_ontology_term_id EFO:0009899 EFO:0009899
disease_ontology_term_id PATO:0000461 PATO:0000461
development_stage_ontology_term_id HsapDv:0000140 HsapDv:0000140
cell_type_ontology_term_id CL:0000115 CL:0000115
self_reported_ethnicity_ontology_term_id unknown unknown
sex_ontology_term_id PATO:0000384 PATO:0000384
is_primary_data True True
organism_ontology_term_id NCBITaxon:9606 NCBITaxon:9606
suspension_type cell cell
cell_type endothelial cell endothelial cell
assay 10x 3' v2 10x 3' v2
disease normal normal
organism Homo sapiens Homo sapiens
sex male male
tissue lung lung
self_reported_ethnicity unknown unknown
development_stage 46-year-old human stage 46-year-old human stage

Original 10x data seems to have obs['cell.id'] + '-1' as the index in the final merged LCA data

In [4]:
final_10x_indexes = predictions_adata.obs.loc[predictions_adata.obs['method']=='10X'].index.tolist()


original_10x_values = lca_10x_adata.obs.index.tolist()
original_10x_values = [x + '-1' for x in original_10x_values]

from asctb_ct_label_mapper.utilities.plotting import make_venn_diagram


try:
    fig = make_venn_diagram(
        A=set(final_10x_indexes),
        B=set(original_10x_values),
        labels=['Final 10x Indexes in our query-data', 'Original 10x Indexes in our source data'],
        title='Backtracking the indexes for the 10x Data: Everything should be just one set'
    )
except:
    pass

Original smartseq2 data seems to have obs['cell.id'] + '-2' as the index in the final merged LCA data

In [5]:
final_smartseq2_indexes = predictions_adata.obs.loc[predictions_adata.obs['method']=='smartseq2'].index.tolist()


original_smartseq2_values = lca_smart_seq2_adata.obs['cell.id'].values.tolist()
original_smartseq2_values = [x + '-2' for x in original_smartseq2_values]


from asctb_ct_label_mapper.utilities.plotting import make_venn_diagram


try:
    fig = make_venn_diagram(
        A=set(final_smartseq2_indexes),
        B=set(original_smartseq2_values),
        labels=['Final Smartseq2 Indexes in our query-data', 'Original Smartseq2 Indexes in our source data'],
        title='Backtracking the indexes for the Smartseq2 Data: Everything should be just one set'
    )
except:
    pass

Let's create the actual key-value store that maps pre-existing annotations to compartments in the raw-datasets¶

In [6]:
lca_10x_adata.obs['10x_index'] = [x + '-1' for x in lca_10x_adata.obs.index]
lca_10x_adata.obs['assay'] = ['10X' for x in lca_10x_adata.obs.index]


lca_smart_seq2_adata.obs['smart_seq2_index'] = [x + '-2' for x in lca_smart_seq2_adata.obs['cell.id'].tolist()]
lca_smart_seq2_adata.obs['assay'] = ['smartseq2' for x in lca_smart_seq2_adata.obs.index]


compartments_df = pd.concat(
    [
        lca_10x_adata.obs[['assay','cell_type','10x_index','compartment']].reset_index(drop=True).rename({'10x_index':'index'}, axis=1),
        lca_smart_seq2_adata.obs[['assay','cell_type','smart_seq2_index','compartment']].reset_index(drop=True).rename({'smart_seq2_index':'index'}, axis=1)
    ],
    axis=0
)
compartments_df
Out[6]:
assay cell_type index compartment
0 10X endothelial cell P2_1_AAACCTGAGAAACCAT-1 endothelial
1 10X endothelial cell P2_1_AAATGCCAGATGAGAG-1 endothelial
2 10X endothelial cell P2_1_AACACGTTCGATCCCT-1 endothelial
3 10X endothelial cell P2_1_AACACGTTCGCACTCT-1 endothelial
4 10X endothelial cell P2_1_AACCATGCAGCTCGCA-1 endothelial
... ... ... ... ...
9404 smartseq2 lung ciliated cell M5_B001771.gencode.vH29-2 epithelial
9405 smartseq2 lung ciliated cell N2_B001769.gencode.vH29-2 epithelial
9406 smartseq2 lung ciliated cell N2_B002460.gencode.vH29-2 epithelial
9407 smartseq2 lung ciliated cell O2_B001774.gencode.vH29-2 epithelial
9408 smartseq2 lung ciliated cell O7_B001774.gencode.vH29-2 epithelial

75071 rows × 4 columns

1. popv_preds have already been crosswalked to ASCT+B naming convention¶

2. Similarly perform crosswalk of these pre-existing annotations within the input dataset¶

3. Finally fetch the compartments for each of the popv_preds label.¶

In [7]:
mapping_compartments_df = compartments_df[['cell_type','compartment']].drop_duplicates().sort_values(by=['cell_type'])
mapping_compartments_df['cell_type_asctb_equivalent'] = mapping_compartments_df['cell_type'].apply(lambda x : translation_hmap.get(x.lower().replace('φ','ï†'), x))
mapping_compartments_df
Out[7]:
cell_type compartment cell_type_asctb_equivalent
9971 B cell immune b cell
9944 CD1c-positive myeloid dendritic cell immune CD1c-positive myeloid dendritic cell
10116 alveolar macrophage immune alveolar macrophage
32352 basal cell epithelial basal cell
26973 bronchial smooth muscle cell stromal bronchial smooth muscle cell
3237 capillary endothelial cell endothelial cap1 general capillary gcap
64536 ciliated cell epithelial ciliated cell
26224 classical monocyte immune classical monocyte
20465 club cell epithelial club cell
9714 dendritic cell immune migratory dendritic cell
7175 dendritic cell, human immune dendritic cell, human
16969 effector memory CD4-positive, alpha-beta T cell immune effector memory CD4-positive, alpha-beta T cell
22009 effector memory CD8-positive, alpha-beta T cell immune effector memory CD8-positive, alpha-beta T cell
0 endothelial cell endothelial endothelial cell
18276 endothelial cell of artery endothelial endothelial cell of artery
21648 endothelial cell of lymphatic vessel endothelial lymphatic endothelial cell
57177 epithelial cell epithelial epithelial cell
27978 fibroblast stromal fibroblast of lung
26516 intermediate monocyte immune classical monocyte
27231 lung ciliated cell epithelial lung ciliated cell
64624 lung goblet cell epithelial lung goblet cell
61811 lung neuroendocrine cell epithelial lung neuroendocrine cell
22423 lymphocyte immune lymphocyte
21902 macrophage immune macrophage
22482 mature NK T cell immune mature nk t cell
32016 megakaryocyte immune megakaryocyte
38508 mesothelial cell of pleura stromal mesothelial cell of pleura
25252 monocyte immune monocyte
20574 mucus secreting cell epithelial mucus secreting cell
9826 myeloid dendritic cell, human immune myeloid dendritic cell, human
25459 myeloid leukocyte immune myeloid leukocyte
28380 myofibroblast cell stromal myofibroblast cell
16891 naive thymus-derived CD4-positive, alpha-beta ... immune naive thymus-derived CD4-positive, alpha-beta ...
16378 naive thymus-derived CD8-positive, alpha-beta ... immune naive thymus-derived CD8-positive, alpha-beta ...
22574 natural killer cell immune natural killer cell
3040 neutrophil immune neutrophil
26704 non-classical monocyte immune non-classical monocyte
19244 pericyte stromal lung pericyte
32447 plasma cell immune plasma cell
9891 plasmacytoid dendritic cell, human immune plasmacytoid dendritic cell, human
38473 pulmonary interstitial fibroblast stromal pulmonary interstitial fibroblast
64002 pulmonary ionocyte epithelial pulmonary ionocyte
21467 respiratory basal cell epithelial respiratory basal cell
64024 tracheobronchial serous cell epithelial tracheobronchial serous cell
28503 type I pneumocyte epithelial type i pneumocyte
20932 type II pneumocyte epithelial type ii pneumocyte
20379 vascular associated smooth muscle cell stromal vascular smooth muscle cell
17474 vein endothelial cell endothelial pulmonary venous endothelial cell
In [8]:
popv_compartments_df = pd.merge(predictions_adata.obs['popv_preds'], mapping_compartments_df, how='left', left_on='popv_preds', right_on='cell_type_asctb_equivalent')
popv_compartments_df['compartment'] = popv_compartments_df['compartment'].tolist()


# Received Ellen's comments on 16th April.
# Manually imputing the compartment for these PopV-labels since they weren't present in the source datasets.
sme_feedback_for_compartments = {
    'cd8+ t cell naive':'immune',
    'cd4+ t cell naive':'immune',
    'bronchial goblet cell':'epithelial',
    'suprabasal cell':'epithelial',
    'plasmacytoid dendritic cell':'immune',
    'mesothelial cell':'stromal',
    'serous secreting cell of bronchus submucosal gland':'epithelial',
    'arterial endothelial cell':'endothelial',
    'smooth muscle cell':'stromal',
    'cd4+ t cell effector memory':'immune'
}

popv_compartments_df.loc[popv_compartments_df['compartment'].isna(), 'compartment'] = popv_compartments_df.loc[popv_compartments_df['compartment'].isna(), 'popv_preds'].apply(lambda x: sme_feedback_for_compartments[x])
popv_compartments_df = pd.DataFrame(popv_compartments_df.groupby(by=['popv_preds','compartment'])['popv_preds'].count())
popv_compartments_df.columns = ['count']
popv_compartments_df = popv_compartments_df.reset_index()

popv_compartments_df.sort_values(by=['compartment', 'count'], ascending=[False, False])
Out[8]:
popv_preds compartment count
12 fibroblast of lung stromal 2428
14 lung pericyte stromal 2125
4 bronchial smooth muscle cell stromal 1107
31 vascular smooth muscle cell stromal 571
18 mesothelial cell stromal 30
27 smooth muscle cell stromal 5
16 macrophage immune 15245
17 mature nk t cell immune 5289
8 cd8+ t cell naive immune 5172
9 classical monocyte immune 5100
7 cd4+ t cell naive immune 3911
20 neutrophil immune 2107
21 non-classical monocyte immune 988
1 b cell immune 845
19 migratory dendritic cell immune 630
22 plasma cell immune 203
23 plasmacytoid dendritic cell immune 139
6 cd4+ t cell effector memory immune 4
30 type ii pneumocyte epithelial 5125
13 lung ciliated cell epithelial 1977
10 club cell epithelial 1972
29 type i pneumocyte epithelial 1521
2 basal cell epithelial 1151
3 bronchial goblet cell epithelial 979
28 suprabasal cell epithelial 417
24 pulmonary ionocyte epithelial 81
26 serous secreting cell of bronchus submucosal g... epithelial 19
5 cap1 general capillary gcap endothelial 15160
11 endothelial cell of artery endothelial 1566
25 pulmonary venous endothelial cell endothelial 1240
15 lymphatic endothelial cell endothelial 503
0 arterial endothelial cell endothelial 11
In [9]:
import plotly.graph_objects as go

# Define the dropdown options
dropdown_options = [{'label': 'All compartments', 'value': 'all'},
                    {'label': 'Epithelial', 'value': 'epithelial'},
                    {'label': 'Endothelial', 'value': 'endothelial'},
                    {'label': 'Stromal', 'value': 'stromal'},
                    {'label': 'Immune', 'value': 'immune'}]

# Define the first trace with the aggregated counts
total_count = popv_compartments_df['count'].sum()
fig = go.Figure([
    go.Pie(
        values=popv_compartments_df['count'],
        labels=popv_compartments_df['compartment'],
        name='All compartments',
        hole=0.4,
    )
])


fig.update_layout(
    title='Breakdown of all compartments for PopV predicted labels',
    
)


fig.show()
In [10]:
import plotly.graph_objects as go

fig = go.Figure()

dropdown_options = [
#     {'label': 'All', 'value': 'all'},
    {'label': 'Endothelial', 'value': 'endothelial'},
    {'label': 'Epithelial', 'value': 'epithelial'},
    {'label': 'Immune', 'value': 'immune'},
    {'label': 'Stromal', 'value': 'stromal'},
]


fig = go.Figure()

# Define the first trace with the aggregated counts
# fig.add_trace(
#     go.Pie(
#         values=popv_compartments_df['count'],
#         labels=popv_compartments_df['compartment'],
#         name='all compartments',
#         hole=0.4,
#         visible=True,
#     )
# )



# Define the other traces for each compartment
for compartment in sorted(popv_compartments_df['compartment'].unique()):
    print(compartment)
    fig.add_trace(
        go.Pie(
            values=popv_compartments_df.loc[popv_compartments_df['compartment'] == compartment, 'count'],
            labels=popv_compartments_df.loc[popv_compartments_df['compartment'] == compartment, 'popv_preds'],
            name=f'Compartment {compartment}',
            hole=0.4,
            visible=False
        )
    )

    
    
    
# Update the layout with the dropdown
fig.update_layout(
    title=f'Breakdown of all compartments for PopV predicted labels in the {QUERY_DATASET_NAME} dataset',
    updatemenus=[
        dict(
            buttons=[
                dict(
                    label=dropdown_option['label'],
                    method='update',
                    args=[
                        {'visible':  #[dropdown_option['value']=='all']+
                             [dropdown_option['value'] in trace.name for trace in fig.data]},
                        {
                            'title': f'Breakdown of all compartments for PopV predicted labels in the {QUERY_DATASET_NAME} dataset' if dropdown_option['value']=='all'
                                else dropdown_option['label'] + 
                                     f' Compartment - {popv_compartments_df.loc[popv_compartments_df["compartment"] == dropdown_option["value"], "popv_preds"].nunique()}' + 
                                         f' unique Cell-Type annotations for PopV predicted labels in the {QUERY_DATASET_NAME} dataset'
                        }
                    ]
                )
                for dropdown_option in dropdown_options
            ]
        )
    ]
)


fig.show()
endothelial
epithelial
immune
stromal
In [12]:
import plotly.graph_objects as go

fig = go.Figure()

dropdown_options = [
    {'label': 'Overview of compartments', 'value': 'all'},
    {'label': 'Endothelial', 'value': 'endothelial'},
    {'label': 'Epithelial', 'value': 'epithelial'},
    {'label': 'Immune', 'value': 'immune'},
    {'label': 'Stromal', 'value': 'stromal'},
]


fig = go.Figure()

# Define the first trace with the aggregated counts
print('all')
fig.add_trace(
    go.Pie(
        values=popv_compartments_df['count'],
        labels=popv_compartments_df['compartment'],
        name='all compartments',
        hole=0.4,
        visible=True,
    )
)



# Define the other traces for each compartment
for compartment in sorted(popv_compartments_df['compartment'].unique()):
    print(compartment)
    fig.add_trace(
        go.Pie(
            values=popv_compartments_df.loc[popv_compartments_df['compartment'] == compartment, 'count'],
            labels=popv_compartments_df.loc[popv_compartments_df['compartment'] == compartment, 'popv_preds'],
            name=f'Compartment {compartment}',
            hole=0.4,
            visible=False
        )
    )

    
    
    
# Update the layout with the dropdown
fig.update_layout(
    title=f'Breakdown of all compartments for PopV predicted labels in the {QUERY_DATASET_NAME} dataset',
    updatemenus=[
        dict(
            buttons=[
                dict(
                    label=dropdown_option['label'],
                    method='update',
                    args=[
                        {'visible':  
                             [dropdown_option['value'] in trace.name for trace in fig.data]},
                        {
                            'title': f'Breakdown of all compartments for PopV predicted labels in the {QUERY_DATASET_NAME} dataset' if dropdown_option['value']=='all'
                                else dropdown_option['label'] + 
                                     f' Compartment - {popv_compartments_df.loc[popv_compartments_df["compartment"] == dropdown_option["value"], "popv_preds"].nunique()}' + 
                                         f' unique Cell-Type annotations for PopV predicted labels in the {QUERY_DATASET_NAME} dataset'
                        }
                    ]
                )
                for dropdown_option in dropdown_options
            ]
        )
    ]
)

fig.show()
all
endothelial
epithelial
immune
stromal
In [ ]:
 
In [ ]: